#Althoff IFN data processing
This analysis is to look at why Scribble KO HSC are insensitive to activating signals from interferon. To understand this WT and Scibble KO mice are treated or not with PolyIC an interferon stimulator. There are 4 groups here with WT, KO, WTIC, and KOIC. There are 3 mice with untreated condition and 4 mice per treated condition.
library(ggplot2)
library(DESeq2)
library(tximport)
library(readr)
library(tximportData)
library(readxl)
library(knitr)
library(tidyverse)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(ggrepel)
library(EnhancedVolcano)
library(fgsea)
library(limma)
library(VennDiagram)
library(UpSetR)
library(wesanderson)
library(kableExtra)
library(reshape)
library(dplyr)
library(msigdbr)
| Sample | Condition |
|---|---|
| WT_rep1 | WT |
| WT_rep2 | WT |
| WT_rep3 | WT |
| KO_rep1 | KO |
| KO_rep2 | KO |
| KO_rep3 | KO |
| WTIC_rep1 | WTIC |
| WTIC_rep2 | WTIC |
| WTIC_rep3 | WTIC |
| WTIC_rep4 | WTIC |
| KOIC_rep1 | KOIC |
| KOIC_rep2 | KOIC |
| KOIC_rep3 | KOIC |
| KOIC_rep4 | KOIC |
We see high correlation between all samples that were treated regardless of phenotype. In the untreated samples there was less correlation this may be due to the mouse variability.
This doesn’t seem to match the correlation data which indicated that the treated samples were more similar to each other.
## [1] TRUE
## [1] TRUE
## using just counts from tximport
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## [1] 4437
## [1] 1423
## [1] 15
## [1] 4481
kable(significant_genes)
| comparison | sig.genes |
|---|---|
| WTvsWTIC | 4437 |
| WTvKO | 1423 |
| WTICvKOIC | 15 |
| KOvKOIC | 4481 |
DEG_WTvKO<-as.data.frame(res_WTvKO) %>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_WTvWTIC<-as.data.frame(res_WTvWTIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_WTICvKOIC<-as.data.frame(res_WTICvKOIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
DEG_KOvKOIC<-as.data.frame(res_KOvKOIC)%>%
filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
rownames_to_column(var="ensembl_gene_id")%>%
left_join(ens2gene,by="ensembl_gene_id")%>%
select(Gene, padj, pvalue, log2FoldChange)%>%
arrange(padj)
plotDispEsts(dds.filtered)
## Warning: Removed 4870 rows containing missing values (geom_point).
## Warning: Removed 3763 rows containing missing values (geom_point).
## Warning: Removed 3763 rows containing missing values (geom_point).
## Warning: Removed 1479 rows containing missing values (geom_point).
## Warning: Unknown or uninitialised column: `Mouse_genotype`.
top_20_WTvKO<-DEG_WTvKO[1:20, 1]
top_20_WTvWTIC<-DEG_WTvWTIC[1:20, 1]
top_20_KOvKOIC<-DEG_KOvKOIC[1:20, 1]
top_20_WTICvKOIC<-DEG_WTvWTIC[1:20, 1]
normalized_counts <- counts(dds.filtered, normalized=T)
normalized_counts<-as.data.frame(normalized_counts)
normalized_counts<-rownames_to_column(normalized_counts, var="ensembl_gene_id")
normalized_counts<-inner_join(normalized_counts, ens2gene, by="ensembl_gene_id")
top20_norm_counts_WTvKO <- filter(normalized_counts, Gene %in% top_20_WTvKO)%>%
select(Gene, WT_rep1, WT_rep2, WT_rep3, KO_rep1, KO_rep2, KO_rep3)
## stopped here there is something wrong
melted_top20_norm_counts_WTvKO <- data.frame(melt(top20_norm_counts_WTvKO))
## Using Gene as id variables
colnames(melted_top20_norm_counts_WTvKO)<-c("Gene", "Sample", "normalized_counts")
melted_top20_norm_counts_WTvKO<-full_join(melted_top20_norm_counts_WTvKO,metadata, by="Sample")
melted_top20_norm_counts_WTvKO<-filter(melted_top20_norm_counts_WTvKO, Condition!=c("WTIC", "KOIC"))
ggplot(melted_top20_norm_counts_WTvKO) +
geom_point(aes(x = Gene, y = normalized_counts, color = Condition)) +
scale_y_log10() +
xlab("Genes") +
ylab("Normalized Counts") +
ggtitle("Top 20 Significant DE Genes") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title=element_text(hjust=0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_point).
kable(top20_norm_counts_WTvKO)
| Gene | WT_rep1 | WT_rep2 | WT_rep3 | KO_rep1 | KO_rep2 | KO_rep3 |
|---|---|---|---|---|---|---|
| P2rx5 | 90.36663 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Ldhc | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 100.3481 |
| Gm5884 | 0.00000 | 130.99230 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Mmgt2 | 822.58968 | 809.62709 | 775.7687 | 244.176446 | 211.743819 | 219.8535 |
| Olfr248 | 0.00000 | 0.00000 | 0.0000 | 186.236272 | 0.000000 | 0.0000 |
| Gm3086 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 135.0137 |
| Gm14817 | 93.74482 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm11716 | 95.43392 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm17022 | 96.27846 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm9903 | 0.00000 | 83.64568 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm28539 | 0.00000 | 226.47463 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm37571 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 115.875555 | 0.0000 |
| Gm38289 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 211.743819 | 0.0000 |
| Gm19148 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 191.5736 |
| 1700018A23Rik | 0.00000 | 0.00000 | 114.0614 | 0.000000 | 0.000000 | 0.0000 |
| Gm43936 | 247.45254 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 0.0000 |
| Gm44108 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 0.000000 | 178.8020 |
| Gm45399 | 121.61490 | 170.44781 | 358.0471 | 4.966301 | 3.334548 | 0.0000 |
| Gm49369 | 0.00000 | 0.00000 | 0.0000 | 137.400983 | 0.000000 | 0.0000 |
| Gm47502 | 0.00000 | 0.00000 | 0.0000 | 0.000000 | 192.570166 | 0.0000 |
ens2gene <- unique(ens2gene)
#pulls the hallmark gene set for mouse
h_gene_sets_mouse = msigdbr(species = "mouse", category = "H")
mouse.hallmark.list = base::split(x = h_gene_sets_mouse$gene_symbol, f = h_gene_sets_mouse$gs_name)
#tidying data
WTvKO_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "KO"),tidy=TRUE)
colnames(WTvKO_gsea)[1]<-"ensembl_gene_id"
WTvKO_gsea<-inner_join(WTvKO_gsea, ens2gene, by="ensembl_gene_id")
WTvKO_gsea_sum<-WTvKO_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTvKO<-deframe(WTvKO_gsea_sum)
fgsea_posWTvKO_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvKO)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.26% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaRes_posWTvKO_tidy <-fgsea_posWTvKO_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
WTvWTIC_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "WTIC"), tidy=TRUE)
colnames(WTvWTIC_gsea)[1]<-"ensembl_gene_id"
WTvWTIC_gsea<-inner_join(WTvWTIC_gsea, ens2gene, by="ensembl_gene_id")
WTvWTIC_gsea_sum<-WTvWTIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTvWTIC<-deframe(WTvWTIC_gsea_sum)
fgsea_posWTvWTIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvWTIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.15% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTvWTIC_tidy <-fgsea_posWTvWTIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
KOvKOIC_gsea<-results(dds.filtered, contrast=c("Condition","KO", "KOIC"),tidy=TRUE)
colnames(KOvKOIC_gsea)[1]<-"ensembl_gene_id"
KOvKOIC_gsea<-inner_join(KOvKOIC_gsea, ens2gene, by="ensembl_gene_id")
KOvKOIC_gsea_sum<-KOvKOIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posKOvKOIC<-deframe(KOvKOIC_gsea_sum)
fgsea_posKOvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posKOvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.13% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posKOvKOIC_tidy <-fgsea_posKOvKOIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))
WTICvKOIC_gsea<-results(dds.filtered, contrast=c("Condition", "WTIC", "KOIC"),tidy=TRUE)
colnames(WTICvKOIC_gsea)[1]<-"ensembl_gene_id"
WTICvKOIC_gsea<-inner_join(WTICvKOIC_gsea, ens2gene, by="ensembl_gene_id")
WTICvKOIC_gsea_sum<-WTICvKOIC_gsea%>%
dplyr::select(Gene, stat) %>%
na.omit() %>%
distinct() %>%
group_by(Gene) %>%
summarise(stat=mean(stat))
rank_posWTICvKOIC<-deframe(WTICvKOIC_gsea_sum)
fgsea_posWTICvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTICvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTICvKOIC_tidy <-fgsea_posWTICvKOIC_hallmark%>%
as_tibble() %>%
arrange(desc(NES))